Import the data

hap_data <- fread("data/WorldHappiness.csv", header = T, stringsAsFactors = F)
country_data <- fread("data/world-data-2023.csv", header = T, stringsAsFactors = F)

EDA

##          Country happiness_score gdp_per_capita   family    health   freedom
##   1:      Norway          7.5370      1.6164632 1.533524 0.7966665 0.6354226
##   2:     Denmark          7.5220      1.4823830 1.551122 0.7925655 0.6260067
##   3:     Iceland          7.5040      1.4806330 1.610574 0.8335521 0.6271626
##   4: Switzerland          7.4940      1.5649796 1.516912 0.8581313 0.6200706
##   5:     Finland          7.4690      1.4435719 1.540247 0.8091577 0.6179509
##  ---                                                                        
## 788:    Botswana          3.4789      0.9975490 0.000000 0.4941017 0.5090894
## 789:    Tanzania          3.4762      0.4571631 0.000000 0.4426779 0.5093431
## 790:      Rwanda          3.3123      0.3432427 0.000000 0.5723833 0.6040879
## 791:    Zimbabwe          3.2992      0.4255640 0.000000 0.3750376 0.3774047
## 792: Afghanistan          2.5669      0.3007058 0.000000 0.2660515 0.0000000
##      generosity government_trust dystopia_residual continent Year
##   1: 0.36201224      0.315963835         2.2770267    Europe 2015
##   2: 0.35528049      0.400770068         2.3137074    Europe 2015
##   3: 0.47554022      0.153526559         2.3227153    Europe 2015
##   4: 0.29054928      0.367007285         2.2767162    Europe 2015
##   5: 0.24548277      0.382611543         2.4301815    Europe 2015
##  ---                                                             
## 788: 0.03340749      0.101786368         0.2572405    Africa 2020
## 789: 0.27154091      0.203880861         0.7189634    Africa 2020
## 790: 0.23570499      0.485542476         0.5484450    Africa 2020
## 791: 0.15134919      0.080928579         0.8410311    Africa 2020
## 792: 0.13523471      0.001225785         1.5072356      Asia 2020
##      social_support cpi_score
##   1:      0.0000000        88
##   2:      0.0000000        91
##   3:      0.0000000        79
##   4:      0.0000000        86
##   5:      0.0000000        90
##  ---                         
## 788:      1.0856948        60
## 789:      0.8726746        38
## 790:      0.5228763        54
## 791:      1.0478352        24
## 792:      0.3564338        19
##   [1] "Norway"                 "Denmark"                "Iceland"               
##   [4] "Switzerland"            "Finland"                "Netherlands"           
##   [7] "Canada"                 "New Zealand"            "Sweden"                
##  [10] "Australia"              "Israel"                 "Costa Rica"            
##  [13] "Austria"                "United States"          "Ireland"               
##  [16] "Germany"                "Belgium"                "Luxembourg"            
##  [19] "United Kingdom"         "Chile"                  "United Arab Emirates"  
##  [22] "Brazil"                 "Argentina"              "Mexico"                
##  [25] "Singapore"              "Malta"                  "Guatemala"             
##  [28] "Uruguay"                "Panama"                 "France"                
##  [31] "Thailand"               "Spain"                  "Colombia"              
##  [34] "Saudi Arabia"           "Kuwait"                 "Slovakia"              
##  [37] "Bahrain"                "Malaysia"               "Nicaragua"             
##  [40] "Ecuador"                "El Salvador"            "Poland"                
##  [43] "Uzbekistan"             "Italy"                  "Russia"                
##  [46] "Japan"                  "Lithuania"              "Algeria"               
##  [49] "Latvia"                 "Moldova"                "Romania"               
##  [52] "Bolivia"                "Turkmenistan"           "Kazakhstan"            
##  [55] "Slovenia"               "Peru"                   "Mauritius"             
##  [58] "Cyprus"                 "Estonia"                "Belarus"               
##  [61] "Libya"                  "Turkey"                 "Paraguay"              
##  [64] "Philippines"            "Serbia"                 "Jordan"                
##  [67] "Hungary"                "Jamaica"                "Croatia"               
##  [70] "Kosovo"                 "China"                  "Pakistan"              
##  [73] "Indonesia"              "Venezuela"              "Montenegro"            
##  [76] "Morocco"                "Azerbaijan"             "Dominican Republic"    
##  [79] "Greece"                 "Lebanon"                "Portugal"              
##  [82] "Bosnia and Herzegovina" "Honduras"               "Nigeria"               
##  [85] "Vietnam"                "Tajikistan"             "Kyrgyzstan"            
##  [88] "Nepal"                  "Mongolia"               "South Africa"          
##  [91] "Tunisia"                "Egypt"                  "Bulgaria"              
##  [94] "Sierra Leone"           "Cameroon"               "Iran"                  
##  [97] "Albania"                "Bangladesh"             "Kenya"                 
## [100] "Myanmar"                "Senegal"                "Zambia"                
## [103] "Iraq"                   "Gabon"                  "Ethiopia"              
## [106] "Sri Lanka"              "Armenia"                "India"                 
## [109] "Mauritania"             "Georgia"                "Mali"                  
## [112] "Cambodia"               "Ghana"                  "Ukraine"               
## [115] "Uganda"                 "Burkina Faso"           "Niger"                 
## [118] "Malawi"                 "Chad"                   "Zimbabwe"              
## [121] "Afghanistan"            "Botswana"               "Benin"                 
## [124] "Madagascar"             "Haiti"                  "Yemen"                 
## [127] "Liberia"                "Guinea"                 "Togo"                  
## [130] "Rwanda"                 "Tanzania"               "Burundi"               
## [133] "Switzerland"            "Iceland"                "Denmark"               
## [136] "Norway"                 "Canada"                 "Finland"               
## [139] "Netherlands"            "Sweden"                 "New Zealand"           
## [142] "Australia"              "Israel"                 "Costa Rica"            
## [145] "Austria"                "Mexico"                 "United States"         
## [148] "Brazil"                 "Luxembourg"             "Ireland"               
## [151] "Belgium"                "United Arab Emirates"   "United Kingdom"        
## [154] "Venezuela"              "Singapore"              "Panama"                
## [157] "Germany"                "Chile"                  "France"                
## [160] "Argentina"              "Uruguay"                "Colombia"              
## [163] "Thailand"               "Saudi Arabia"           "Spain"                 
## [166] "Malta"                  "Kuwait"                 "El Salvador"           
## [169] "Guatemala"              "Uzbekistan"             "Slovakia"              
## [172] "Japan"                  "Ecuador"                "Bahrain"               
## [175] "Italy"                  "Bolivia"                "Moldova"               
## [178] "Paraguay"               "Kazakhstan"             "Slovenia"              
## [181] "Lithuania"              "Nicaragua"              "Peru"                  
## [184] "Belarus"                "Poland"                 "Malaysia"              
## [187] "Croatia"                "Libya"                  "Russia"                
## [190] "Jamaica"                "Cyprus"                 "Algeria"               
## [193] "Kosovo"                 "Turkmenistan"           "Mauritius"             
## [196] "Estonia"                "Indonesia"              "Vietnam"               
## [199] "Turkey"                 "Kyrgyzstan"             "Nigeria"               
## [202] "Azerbaijan"             "Pakistan"               "Jordan"                
## [205] "Montenegro"             "China"                  "Zambia"                
## [208] "Romania"                "Serbia"                 "Portugal"              
## [211] "Latvia"                 "Philippines"            "Morocco"               
## [214] "Albania"                "Bosnia and Herzegovina" "Dominican Republic"    
## [217] "Mongolia"               "Greece"                 "Lebanon"               
## [220] "Hungary"                "Honduras"               "Tajikistan"            
## [223] "Tunisia"                "Bangladesh"             "Iran"                  
## [226] "Ukraine"                "Iraq"                   "South Africa"          
## [229] "Ghana"                  "Zimbabwe"               "Liberia"               
## [232] "India"                  "Haiti"                  "Nepal"                 
## [235] "Ethiopia"               "Sierra Leone"           "Mauritania"            
## [238] "Kenya"                  "Armenia"                "Botswana"              
## [241] "Myanmar"                "Georgia"                "Malawi"                
## [244] "Sri Lanka"              "Cameroon"               "Bulgaria"              
## [247] "Egypt"                  "Yemen"                  "Mali"                  
## [250] "Uganda"                 "Senegal"                "Gabon"                 
## [253] "Niger"                  "Cambodia"               "Tanzania"              
## [256] "Madagascar"             "Chad"                   "Guinea"                
## [259] "Burkina Faso"           "Afghanistan"            "Rwanda"                
## [262] "Benin"                  "Burundi"                "Togo"                  
## [265] "Finland"                "Denmark"                "Norway"                
## [268] "Iceland"                "Netherlands"            "Switzerland"           
## [271] "Sweden"                 "New Zealand"            "Canada"                
## [274] "Austria"                "Australia"              "Costa Rica"            
## [277] "Israel"                 "Luxembourg"             "United Kingdom"        
## [280] "Ireland"                "Germany"                "Belgium"               
## [283] "United States"          "United Arab Emirates"   "Malta"                 
## [286] "Mexico"                 "France"                 "Chile"                 
## [289] "Guatemala"              "Saudi Arabia"           "Spain"                 
## [292] "Panama"                 "Brazil"                 "Uruguay"               
## [295] "Singapore"              "El Salvador"            "Italy"                 
## [298] "Bahrain"                "Slovakia"               "Poland"                
## [301] "Uzbekistan"             "Lithuania"              "Colombia"              
## [304] "Slovenia"               "Nicaragua"              "Kosovo"                
## [307] "Argentina"              "Romania"                "Cyprus"                
## [310] "Ecuador"                "Kuwait"                 "Thailand"              
## [313] "Latvia"                 "Estonia"                "Jamaica"               
## [316] "Mauritius"              "Japan"                  "Honduras"              
## [319] "Kazakhstan"             "Bolivia"                "Hungary"               
## [322] "Paraguay"               "Peru"                   "Portugal"              
## [325] "Pakistan"               "Russia"                 "Philippines"           
## [328] "Serbia"                 "Moldova"                "Libya"                 
## [331] "Montenegro"             "Tajikistan"             "Croatia"               
## [334] "Dominican Republic"     "Bosnia and Herzegovina" "Turkey"                
## [337] "Malaysia"               "Belarus"                "Greece"                
## [340] "Mongolia"               "Nigeria"                "Kyrgyzstan"            
## [343] "Turkmenistan"           "Algeria"                "Morocco"               
## [346] "Azerbaijan"             "Lebanon"                "Indonesia"             
## [349] "China"                  "Vietnam"                "Cameroon"              
## [352] "Bulgaria"               "Ghana"                  "Nepal"                 
## [355] "Jordan"                 "Benin"                  "Gabon"                 
## [358] "South Africa"           "Albania"                "Venezuela"             
## [361] "Cambodia"               "Senegal"                "Niger"                 
## [364] "Burkina Faso"           "Armenia"                "Iran"                  
## [367] "Guinea"                 "Georgia"                "Kenya"                 
## [370] "Mauritania"             "Tunisia"                "Bangladesh"            
## [373] "Iraq"                   "Mali"                   "Sierra Leone"          
## [376] "Sri Lanka"              "Myanmar"                "Chad"                  
## [379] "Ukraine"                "Ethiopia"               "Uganda"                
## [382] "Egypt"                  "Zambia"                 "Togo"                  
## [385] "India"                  "Liberia"                "Madagascar"            
## [388] "Burundi"                "Zimbabwe"               "Haiti"                 
## [391] "Botswana"               "Malawi"                 "Yemen"                 
## [394] "Rwanda"                 "Tanzania"               "Afghanistan"           
## [397] "Finland"                "Norway"                 "Denmark"               
## [400] "Iceland"                "Switzerland"            "Netherlands"           
## [403] "Canada"                 "New Zealand"            "Sweden"                
## [406] "Australia"              "United Kingdom"         "Austria"               
## [409] "Costa Rica"             "Ireland"                "Germany"               
## [412] "Belgium"                "Luxembourg"             "United States"         
## [415] "Israel"                 "United Arab Emirates"   "Malta"                 
## [418] "France"                 "Mexico"                 "Chile"                 
## [421] "Panama"                 "Brazil"                 "Argentina"             
## [424] "Guatemala"              "Uruguay"                "Saudi Arabia"          
## [427] "Singapore"              "Malaysia"               "Spain"                 
## [430] "Colombia"               "Slovakia"               "El Salvador"           
## [433] "Nicaragua"              "Poland"                 "Bahrain"               
## [436] "Uzbekistan"             "Kuwait"                 "Thailand"              
## [439] "Italy"                  "Ecuador"                "Lithuania"             
## [442] "Slovenia"               "Romania"                "Latvia"                
## [445] "Japan"                  "Mauritius"              "Jamaica"               
## [448] "Russia"                 "Kazakhstan"             "Cyprus"                
## [451] "Bolivia"                "Estonia"                "Paraguay"              
## [454] "Peru"                   "Kosovo"                 "Moldova"               
## [457] "Turkmenistan"           "Hungary"                "Libya"                 
## [460] "Philippines"            "Honduras"               "Belarus"               
## [463] "Turkey"                 "Pakistan"               "Portugal"              
## [466] "Serbia"                 "Lebanon"                "Greece"                
## [469] "Montenegro"             "Croatia"                "Dominican Republic"    
## [472] "Algeria"                "Morocco"                "China"                 
## [475] "Azerbaijan"             "Tajikistan"             "Jordan"                
## [478] "Nigeria"                "Kyrgyzstan"             "Bosnia and Herzegovina"
## [481] "Mongolia"               "Vietnam"                "Indonesia"             
## [484] "Cameroon"               "Bulgaria"               "Nepal"                 
## [487] "Venezuela"              "Gabon"                  "South Africa"          
## [490] "Iran"                   "Ghana"                  "Senegal"               
## [493] "Tunisia"                "Albania"                "Sierra Leone"          
## [496] "Bangladesh"             "Sri Lanka"              "Iraq"                  
## [499] "Mali"                   "Cambodia"               "Burkina Faso"          
## [502] "Egypt"                  "Kenya"                  "Zambia"                
## [505] "Mauritania"             "Ethiopia"               "Georgia"               
## [508] "Armenia"                "Myanmar"                "Chad"                  
## [511] "India"                  "Niger"                  "Uganda"                
## [514] "Benin"                  "Ukraine"                "Togo"                  
## [517] "Guinea"                 "Madagascar"             "Zimbabwe"              
## [520] "Afghanistan"            "Botswana"               "Malawi"                
## [523] "Haiti"                  "Liberia"                "Rwanda"                
## [526] "Yemen"                  "Tanzania"               "Burundi"               
## [529] "Denmark"                "Switzerland"            "Iceland"               
## [532] "Norway"                 "Finland"                "Canada"                
## [535] "Netherlands"            "New Zealand"            "Australia"             
## [538] "Sweden"                 "Israel"                 "Austria"               
## [541] "United States"          "Costa Rica"             "Germany"               
## [544] "Brazil"                 "Belgium"                "Ireland"               
## [547] "Luxembourg"             "Mexico"                 "Singapore"             
## [550] "United Kingdom"         "Chile"                  "Panama"                
## [553] "Argentina"              "United Arab Emirates"   "Uruguay"               
## [556] "Malta"                  "Colombia"               "France"                
## [559] "Thailand"               "Saudi Arabia"           "Spain"                 
## [562] "Algeria"                "Guatemala"              "Kuwait"                
## [565] "Bahrain"                "Venezuela"              "Slovakia"              
## [568] "El Salvador"            "Malaysia"               "Nicaragua"             
## [571] "Uzbekistan"             "Italy"                  "Ecuador"               
## [574] "Japan"                  "Kazakhstan"             "Moldova"               
## [577] "Russia"                 "Poland"                 "Bolivia"               
## [580] "Lithuania"              "Belarus"                "Slovenia"              
## [583] "Peru"                   "Turkmenistan"           "Mauritius"             
## [586] "Libya"                  "Latvia"                 "Cyprus"                
## [589] "Paraguay"               "Romania"                "Estonia"               
## [592] "Jamaica"                "Croatia"                "Kosovo"                
## [595] "Turkey"                 "Indonesia"              "Jordan"                
## [598] "Azerbaijan"             "Philippines"            "China"                 
## [601] "Kyrgyzstan"             "Serbia"                 "Bosnia and Herzegovina"
## [604] "Montenegro"             "Dominican Republic"     "Morocco"               
## [607] "Hungary"                "Pakistan"               "Lebanon"               
## [610] "Portugal"               "Vietnam"                "Tunisia"               
## [613] "Greece"                 "Tajikistan"             "Mongolia"              
## [616] "Nigeria"                "Honduras"               "Iran"                  
## [619] "Zambia"                 "Nepal"                  "Albania"               
## [622] "Bangladesh"             "Sierra Leone"           "Iraq"                  
## [625] "Cameroon"               "Ethiopia"               "South Africa"          
## [628] "Sri Lanka"              "India"                  "Myanmar"               
## [631] "Egypt"                  "Armenia"                "Kenya"                 
## [634] "Ukraine"                "Ghana"                  "Georgia"               
## [637] "Senegal"                "Bulgaria"               "Mauritania"            
## [640] "Zimbabwe"               "Malawi"                 "Gabon"                 
## [643] "Mali"                   "Haiti"                  "Botswana"              
## [646] "Cambodia"               "Niger"                  "Chad"                  
## [649] "Burkina Faso"           "Uganda"                 "Yemen"                 
## [652] "Madagascar"             "Tanzania"               "Liberia"               
## [655] "Guinea"                 "Rwanda"                 "Benin"                 
## [658] "Afghanistan"            "Togo"                   "Burundi"               
## [661] "Finland"                "Denmark"                "Switzerland"           
## [664] "Iceland"                "Norway"                 "Netherlands"           
## [667] "Sweden"                 "New Zealand"            "Austria"               
## [670] "Luxembourg"             "Canada"                 "Australia"             
## [673] "United Kingdom"         "Israel"                 "Costa Rica"            
## [676] "Ireland"                "Germany"                "United States"         
## [679] "Belgium"                "United Arab Emirates"   "Malta"                 
## [682] "France"                 "Mexico"                 "Uruguay"               
## [685] "Saudi Arabia"           "Spain"                  "Guatemala"             
## [688] "Italy"                  "Singapore"              "Brazil"                
## [691] "Slovenia"               "El Salvador"            "Kosovo"                
## [694] "Panama"                 "Slovakia"               "Uzbekistan"            
## [697] "Chile"                  "Bahrain"                "Lithuania"             
## [700] "Poland"                 "Colombia"               "Cyprus"                
## [703] "Nicaragua"              "Romania"                "Kuwait"                
## [706] "Mauritius"              "Kazakhstan"             "Estonia"               
## [709] "Philippines"            "Hungary"                "Thailand"              
## [712] "Argentina"              "Honduras"               "Latvia"                
## [715] "Ecuador"                "Portugal"               "Jamaica"               
## [718] "Japan"                  "Peru"                   "Serbia"                
## [721] "Bolivia"                "Pakistan"               "Paraguay"              
## [724] "Dominican Republic"     "Bosnia and Herzegovina" "Moldova"               
## [727] "Tajikistan"             "Montenegro"             "Russia"                
## [730] "Kyrgyzstan"             "Belarus"                "Greece"                
## [733] "Croatia"                "Libya"                  "Mongolia"              
## [736] "Malaysia"               "Vietnam"                "Indonesia"             
## [739] "Benin"                  "Azerbaijan"             "Ghana"                 
## [742] "Nepal"                  "Turkey"                 "China"                 
## [745] "Turkmenistan"           "Bulgaria"               "Morocco"               
## [748] "Cameroon"               "Venezuela"              "Algeria"               
## [751] "Senegal"                "Guinea"                 "Niger"                 
## [754] "Albania"                "Cambodia"               "Bangladesh"            
## [757] "Gabon"                  "South Africa"           "Iraq"                  
## [760] "Lebanon"                "Burkina Faso"           "Mali"                  
## [763] "Nigeria"                "Armenia"                "Georgia"               
## [766] "Iran"                   "Jordan"                 "Kenya"                 
## [769] "Ukraine"                "Liberia"                "Uganda"                
## [772] "Chad"                   "Tunisia"                "Mauritania"            
## [775] "Sri Lanka"              "Myanmar"                "Togo"                  
## [778] "Ethiopia"               "Madagascar"             "Egypt"                 
## [781] "Sierra Leone"           "Burundi"                "Zambia"                
## [784] "Haiti"                  "India"                  "Malawi"                
## [787] "Yemen"                  "Botswana"               "Tanzania"              
## [790] "Rwanda"                 "Zimbabwe"               "Afghanistan"
## # A tibble: 132 × 2
##    country       sum
##    <chr>       <int>
##  1 Afghanistan     6
##  2 Albania         6
##  3 Algeria         6
##  4 Argentina       6
##  5 Armenia         6
##  6 Australia       6
##  7 Austria         6
##  8 Azerbaijan      6
##  9 Bahrain         6
## 10 Bangladesh      6
## # ℹ 122 more rows
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
##   always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `summarise()` has grouped output by 'continent', 'country'. You can override
## using the `.groups` argument.
## # A tibble: 132 × 2
## # Groups:   continent, country [132]
##    continent country     
##    <chr>     <chr>       
##  1 Africa    Algeria     
##  2 Africa    Benin       
##  3 Africa    Botswana    
##  4 Africa    Burkina Faso
##  5 Africa    Burundi     
##  6 Africa    Cameroon    
##  7 Africa    Chad        
##  8 Africa    Egypt       
##  9 Africa    Ethiopia    
## 10 Africa    Gabon       
## # ℹ 122 more rows
## # A tibble: 132 × 3
##    country     mean_happiness_score gdp_per_capita
##    <chr>                      <dbl>          <dbl>
##  1 Finland                     7.58           1.34
##  2 Denmark                     7.56           1.39
##  3 Norway                      7.53           1.50
##  4 Switzerland                 7.52           1.46
##  5 Iceland                     7.51           1.38
##  6 Netherlands                 7.41           1.40
##  7 Canada                      7.33           1.37
##  8 Sweden                      7.32           1.39
##  9 New Zealand                 7.31           1.30
## 10 Australia                   7.27           1.38
## # ℹ 122 more rows

mean_gdp <- hap_data %>%
  group_by(country) %>%
  reframe(mean_happiness_score = mean(happiness_score), gdp_per_capita = mean(gdp_per_capita)) %>%
  arrange(desc(mean_happiness_score))

mean_gdp
## # A tibble: 132 × 3
##    country     mean_happiness_score gdp_per_capita
##    <chr>                      <dbl>          <dbl>
##  1 Finland                     7.58           1.34
##  2 Denmark                     7.56           1.39
##  3 Norway                      7.53           1.50
##  4 Switzerland                 7.52           1.46
##  5 Iceland                     7.51           1.38
##  6 Netherlands                 7.41           1.40
##  7 Canada                      7.33           1.37
##  8 Sweden                      7.32           1.39
##  9 New Zealand                 7.31           1.30
## 10 Australia                   7.27           1.38
## # ℹ 122 more rows
fit_mean_gdp <- lm(mean_happiness_score ~ gdp_per_capita, data = mean_gdp) #fitting simple linear regression
summary(fit_mean_gdp) #summary of fit
## 
## Call:
## lm(formula = mean_happiness_score ~ gdp_per_capita, data = mean_gdp)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.97429 -0.38927 -0.07531  0.49892  1.42150 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      3.2722     0.1448   22.59   <2e-16 ***
## gdp_per_capita   2.3687     0.1442   16.42   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6307 on 130 degrees of freedom
## Multiple R-squared:  0.6748, Adjusted R-squared:  0.6723 
## F-statistic: 269.7 on 1 and 130 DF,  p-value: < 2.2e-16
mean_gdp_coef <- coef(summary(fit_mean_gdp))
mean_gdp_coef[1,1] #Intercept
## [1] 3.272233
mean_gdp_coef[2,1] #Slope
## [1] 2.368744
ggplot(mean_gdp, aes(x = gdp_per_capita, y = mean_happiness_score)) +
  geom_text_repel(aes(x = gdp_per_capita, y = mean_happiness_score, label = country), color = "#ee82ee", size = 2, max.overlaps = 17)+
  geom_point(aes(x = gdp_per_capita, y = mean_happiness_score), color = "#ee82ee") + 
  geom_abline(intercept = mean_gdp_coef[1,1], slope = mean_gdp_coef[2,1], color = "white") + 
  theme(panel.background = element_rect(fill = "#112333"))+ labs(
        x = "Mean GDP Per Capita (Hundreds of Thousands of Dollars)",
        y = "Mean Happiness Score 2015-2020") + 
   ggtitle("Mean Happiness Score vs Mean GDP Per Capita") +   # Set the title using ggtitle()
  theme(plot.title = element_text(hjust = 0.5, family = "against", size = 12), axis.title = element_text(hjust = 0.5, family = "against", size = 8), axis.text = element_text(hjust = 0.5, family = "against", size = 4)) + 
  scale_x_continuous(breaks = seq(0, 2, by = 0.4),   # Customize x-axis breaks (intervals)
                     minor_breaks = seq(0, 2, by = 0.1),  # Customize x-axis minor breaks (intervals)
                     limits = c(0, 2),         # Customize x-axis limits
                     expand = c(0, 0)) +      # Remove extra space around the x-axis
  scale_y_continuous(breaks = seq(2, 8, by = 1),   # Customize y-axis breaks (intervals)
                     minor_breaks = seq(2, 8, by = 0.5),  # Customize y-axis minor breaks (intervals)
                     limits = c(2, 8),         # Customize y-axis limits
                     expand = c(0, 0)) +      # Remove extra space around the y-axis
  theme(panel.grid.major = element_line(color = "beige", size = 0.5),  # Customize major gridlines
        panel.grid.minor = element_line(color = "beige", size = 0.25))  # Customize minor gridlines
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

 # geom_text_repel(aes(x = payroll, y = win, label=team), size=3)
mean_health <- hap_data %>%
  group_by(country) %>%
  reframe(mean_happiness_score = mean(happiness_score), health = mean(health)) %>%
  arrange(desc(mean_happiness_score))

mean_health
## # A tibble: 132 × 3
##    country     mean_happiness_score health
##    <chr>                      <dbl>  <dbl>
##  1 Finland                     7.58  0.888
##  2 Denmark                     7.56  0.884
##  3 Norway                      7.53  0.896
##  4 Switzerland                 7.52  0.947
##  5 Iceland                     7.51  0.932
##  6 Netherlands                 7.41  0.895
##  7 Canada                      7.33  0.921
##  8 Sweden                      7.32  0.914
##  9 New Zealand                 7.31  0.911
## 10 Australia                   7.27  0.933
## # ℹ 122 more rows
fit_mean_health <- lm(mean_happiness_score ~ health, data = mean_health) #fitting simple linear regression
summary(fit_mean_health) #summary of fit
## 
## Call:
## lm(formula = mean_happiness_score ~ health, data = mean_health)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.32965 -0.44722  0.00345  0.48446  1.70080 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.8723     0.1780   16.14   <2e-16 ***
## health        4.0097     0.2598   15.43   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6573 on 130 degrees of freedom
## Multiple R-squared:  0.6468, Adjusted R-squared:  0.6441 
## F-statistic: 238.1 on 1 and 130 DF,  p-value: < 2.2e-16
mean_health_coef <- coef(summary(fit_mean_health))
mean_health_coef[1,1] #Intercept
## [1] 2.872269
mean_health_coef[2,1] #Slope
## [1] 4.009675
ggplot(mean_health, aes(x = health, y = mean_happiness_score)) +
  geom_text_repel(aes(x = health, y = mean_happiness_score, label = country), color = "#ee82ee", size = 2, max.overlaps = 17)+
  geom_point(aes(x = health, y = mean_happiness_score), color = "#ee82ee") + 
  geom_abline(intercept = mean_health_coef[1,1], slope = mean_health_coef[2,1], color = "white") + 
  theme(panel.background = element_rect(fill = "#112333"))+ labs(
        x = "Mean Health Coefficient",
        y = "Mean Happiness Score 2015-2020") + 
   ggtitle("Mean Happiness Score vs Mean Health") +   # Set the title using ggtitle()
  theme(plot.title = element_text(hjust = 0.5, family = "against", size = 12), axis.title = element_text(hjust = 0.5, family = "against", size = 8), axis.text = element_text(hjust = 0.5, family = "against", size = 4)) + 
  scale_x_continuous(breaks = seq(0, 1, by = 0.2),   # Customize x-axis breaks (intervals)
                     minor_breaks = seq(0, 1, by = 0.2),  # Customize x-axis minor breaks (intervals)
                     limits = c(0, 1),         # Customize x-axis limits
                     expand = c(0, 0)) +      # Remove extra space around the x-axis
  scale_y_continuous(breaks = seq(2, 8, by = 1),   # Customize y-axis breaks (intervals)
                     minor_breaks = seq(2, 8, by = 0.5),  # Customize y-axis minor breaks (intervals)
                     limits = c(2, 8),         # Customize y-axis limits
                     expand = c(0, 0)) +      # Remove extra space around the y-axis
  theme(panel.grid.major = element_line(color = "beige", size = 0.5),  # Customize major gridlines
        panel.grid.minor = element_line(color = "beige", size = 0.25))  # Customize minor gridlines
## Warning: Removed 1 rows containing missing values (`geom_text_repel()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

 # geom_text_repel(aes(x = payroll, y = win, label=team), size=3)
mean_freedom <- hap_data %>%
  group_by(country) %>%
  reframe(mean_happiness_score = mean(happiness_score), freedom = mean(freedom)) %>%
  arrange(desc(mean_happiness_score))

mean_freedom
## # A tibble: 132 × 3
##    country     mean_happiness_score freedom
##    <chr>                      <dbl>   <dbl>
##  1 Finland                     7.58   0.628
##  2 Denmark                     7.56   0.632
##  3 Norway                      7.53   0.643
##  4 Switzerland                 7.52   0.622
##  5 Iceland                     7.51   0.625
##  6 Netherlands                 7.41   0.594
##  7 Canada                      7.33   0.616
##  8 Sweden                      7.32   0.623
##  9 New Zealand                 7.31   0.623
## 10 Australia                   7.27   0.608
## # ℹ 122 more rows
fit_mean_freedom <- lm(mean_happiness_score ~ freedom, data = mean_freedom) #fitting simple linear regression
summary(fit_mean_freedom) #summary of fit
## 
## Call:
## lm(formula = mean_happiness_score ~ freedom, data = mean_freedom)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.79808 -0.62992  0.07073  0.77500  1.70874 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.4714     0.2558  13.571  < 2e-16 ***
## freedom       4.6881     0.5704   8.218 1.82e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8972 on 130 degrees of freedom
## Multiple R-squared:  0.3419, Adjusted R-squared:  0.3368 
## F-statistic: 67.54 on 1 and 130 DF,  p-value: 1.818e-13
mean_freedom_coef <- coef(summary(fit_mean_freedom))
mean_freedom_coef[1,1] #Intercept
## [1] 3.471414
mean_freedom_coef[2,1] #Slope
## [1] 4.688101
ggplot(mean_freedom, aes(x = freedom, y = mean_happiness_score)) +
  geom_text_repel(aes(x = freedom, y = mean_happiness_score, label = country), color = "#ee82ee", size = 2, max.overlaps = 17)+
  geom_point(aes(x = freedom, y = mean_happiness_score), color = "#ee82ee") + 
  geom_abline(intercept = mean_freedom_coef[1,1], slope = mean_freedom_coef[2,1], color = "white") + 
  theme(panel.background = element_rect(fill = "#112333"))+ labs(
        x = "Mean Freedom Coefficient",
        y = "Mean Happiness Score 2015-2020") + 
   ggtitle("Mean Happiness Score vs Mean Freedom") +   # Set the title using ggtitle()
  theme(plot.title = element_text(hjust = 0.5, family = "against", size = 12), axis.title = element_text(hjust = 0.5, family = "against", size = 8), axis.text = element_text(hjust = 0.5, family = "against", size = 4)) + 
  scale_x_continuous(breaks = seq(0, 1, by = 0.2),   # Customize x-axis breaks (intervals)
                     minor_breaks = seq(0, 1, by = 0.2),  # Customize x-axis minor breaks (intervals)
                     limits = c(0, 1),         # Customize x-axis limits
                     expand = c(0, 0)) +      # Remove extra space around the x-axis
  scale_y_continuous(breaks = seq(2, 8, by = 1),   # Customize y-axis breaks (intervals)
                     minor_breaks = seq(2, 8, by = 0.5),  # Customize y-axis minor breaks (intervals)
                     limits = c(2, 8),         # Customize y-axis limits
                     expand = c(0, 0)) +      # Remove extra space around the y-axis
  theme(panel.grid.major = element_line(color = "beige", size = 0.5),  # Customize major gridlines
        panel.grid.minor = element_line(color = "beige", size = 0.25))  # Customize minor gridlines

 # geom_text_repel(aes(x = payroll, y = win, label=team), size=3)
mean_generosity <- hap_data %>%
  group_by(country) %>%
  reframe(mean_happiness_score = mean(happiness_score), generosity = mean(generosity)) %>%
  arrange(desc(mean_happiness_score))

mean_generosity
## # A tibble: 132 × 3
##    country     mean_happiness_score generosity
##    <chr>                      <dbl>      <dbl>
##  1 Finland                     7.58      0.208
##  2 Denmark                     7.56      0.306
##  3 Norway                      7.53      0.322
##  4 Switzerland                 7.52      0.276
##  5 Iceland                     7.51      0.410
##  6 Netherlands                 7.41      0.402
##  7 Canada                      7.33      0.372
##  8 Sweden                      7.32      0.326
##  9 New Zealand                 7.31      0.415
## 10 Australia                   7.27      0.401
## # ℹ 122 more rows
fit_mean_generosity <- lm(mean_happiness_score ~ generosity, data = mean_generosity) #fitting simple linear regression
summary(fit_mean_generosity) #summary of fit
## 
## Call:
## lm(formula = mean_happiness_score ~ generosity, data = mean_generosity)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.2334 -0.8421  0.1184  0.8248  2.1170 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   5.1108     0.2009  25.444   <2e-16 ***
## generosity    1.7066     0.8337   2.047   0.0427 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.089 on 130 degrees of freedom
## Multiple R-squared:  0.03123,    Adjusted R-squared:  0.02378 
## F-statistic:  4.19 on 1 and 130 DF,  p-value: 0.04267
mean_generosity_coef <- coef(summary(fit_mean_generosity))
mean_generosity_coef[1,1] #Intercept
## [1] 5.11075
mean_generosity_coef[2,1] #Slope
## [1] 1.706647
ggplot(mean_generosity, aes(x = generosity, y = mean_happiness_score)) +
  geom_text_repel(aes(x = generosity, y = mean_happiness_score, label = country), color = "#ee82ee", size = 2, max.overlaps = 17)+
  geom_point(aes(x = generosity, y = mean_happiness_score), color = "#ee82ee") + 
  geom_abline(intercept = mean_generosity_coef[1,1], slope = mean_generosity_coef[2,1], color = "white") + 
  theme(panel.background = element_rect(fill = "#112333"))+ labs(
        x = "Mean Generosity Coefficient",
        y = "Mean Happiness Score 2015-2020") + 
   ggtitle("Mean Happiness Score vs Mean Generosity") +   # Set the title using ggtitle()
  theme(plot.title = element_text(hjust = 0.5, family = "against", size = 12), axis.title = element_text(hjust = 0.5, family = "against", size = 8), axis.text = element_text(hjust = 0.5, family = "against", size = 4)) + 
  scale_x_continuous(breaks = seq(0, 1, by = 0.2),   # Customize x-axis breaks (intervals)
                     minor_breaks = seq(0, 1, by = 0.2),  # Customize x-axis minor breaks (intervals)
                     limits = c(0, 1),         # Customize x-axis limits
                     expand = c(0, 0)) +      # Remove extra space around the x-axis
  scale_y_continuous(breaks = seq(2, 8, by = 1),   # Customize y-axis breaks (intervals)
                     minor_breaks = seq(2, 8, by = 0.5),  # Customize y-axis minor breaks (intervals)
                     limits = c(2, 8),         # Customize y-axis limits
                     expand = c(0, 0)) +      # Remove extra space around the y-axis
  theme(panel.grid.major = element_line(color = "beige", size = 0.5),  # Customize major gridlines
        panel.grid.minor = element_line(color = "beige", size = 0.25))  # Customize minor gridlines

 # geom_text_repel(aes(x = payroll, y = win, label=team), size=3)
mean_gov_trust <- hap_data %>%
  group_by(country) %>%
  reframe(mean_happiness_score = mean(happiness_score), gov_trust = mean(gov_trust)) %>%
  arrange(desc(mean_happiness_score))

mean_gov_trust
## # A tibble: 132 × 3
##    country     mean_happiness_score gov_trust
##    <chr>                      <dbl>     <dbl>
##  1 Finland                     7.58     0.412
##  2 Denmark                     7.56     0.440
##  3 Norway                      7.53     0.359
##  4 Switzerland                 7.52     0.384
##  5 Iceland                     7.51     0.141
##  6 Netherlands                 7.41     0.310
##  7 Canada                      7.33     0.313
##  8 Sweden                      7.32     0.405
##  9 New Zealand                 7.31     0.410
## 10 Australia                   7.27     0.318
## # ℹ 122 more rows
fit_mean_gov_trust <- lm(mean_happiness_score ~ gov_trust, data = mean_gov_trust) #fitting simple linear regression
summary(fit_mean_gov_trust) #summary of fit
## 
## Call:
## lm(formula = mean_happiness_score ~ gov_trust, data = mean_gov_trust)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.8063 -0.7154  0.1606  0.7288  1.9607 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   4.8441     0.1321  36.672  < 2e-16 ***
## gov_trust     5.0049     0.8071   6.201 6.92e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9716 on 130 degrees of freedom
## Multiple R-squared:  0.2283, Adjusted R-squared:  0.2223 
## F-statistic: 38.45 on 1 and 130 DF,  p-value: 6.922e-09
mean_gov_trust_coef <- coef(summary(fit_mean_gov_trust))
mean_gov_trust_coef[1,1] #Intercept
## [1] 4.844098
mean_gov_trust_coef[2,1] #Slope
## [1] 5.004884
ggplot(mean_gov_trust, aes(x = gov_trust, y = mean_happiness_score)) +
  geom_text_repel(aes(x = gov_trust, y = mean_happiness_score, label = country), color = "#ee82ee", size = 2, max.overlaps = 17)+
  geom_point(aes(x = gov_trust, y = mean_happiness_score), color = "#ee82ee") + 
  geom_abline(intercept = mean_gov_trust_coef[1,1], slope = mean_gov_trust_coef[2,1], color = "white") + 
  theme(panel.background = element_rect(fill = "#112333"))+ labs(
        x = "Mean Government Trust Coefficient",
        y = "Mean Happiness Score 2015-2020") + 
   ggtitle("Mean Happiness Score vs Mean Government Trust") +   # Set the title using ggtitle()
  theme(plot.title = element_text(hjust = 0.5, family = "against", size = 12), axis.title = element_text(hjust = 0.5, family = "against", size = 8), axis.text = element_text(hjust = 0.5, family = "against", size = 4)) + 
  scale_x_continuous(breaks = seq(0, 1, by = 0.2),   # Customize x-axis breaks (intervals)
                     minor_breaks = seq(0, 1, by = 0.2),  # Customize x-axis minor breaks (intervals)
                     limits = c(0, 1),         # Customize x-axis limits
                     expand = c(0, 0)) +      # Remove extra space around the x-axis
  scale_y_continuous(breaks = seq(2, 8, by = 1),   # Customize y-axis breaks (intervals)
                     minor_breaks = seq(2, 8, by = 0.5),  # Customize y-axis minor breaks (intervals)
                     limits = c(2, 8),         # Customize y-axis limits
                     expand = c(0, 0)) +      # Remove extra space around the y-axis
  theme(panel.grid.major = element_line(color = "beige", size = 0.5),  # Customize major gridlines
        panel.grid.minor = element_line(color = "beige", size = 0.25))  # Customize minor gridlines
## Warning: ggrepel: 27 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

 # geom_text_repel(aes(x = payroll, y = win, label=team), size=3)
mean_social_support <- hap_data %>%
  group_by(country) %>%
  reframe(mean_happiness_score = mean(happiness_score), social_support = mean(social_support)) %>%
  arrange(desc(mean_happiness_score))

mean_social_support
## # A tibble: 132 × 3
##    country     mean_happiness_score social_support
##    <chr>                      <dbl>          <dbl>
##  1 Finland                     7.58          0.780
##  2 Denmark                     7.56          0.778
##  3 Norway                      7.53          0.777
##  4 Switzerland                 7.52          0.758
##  5 Iceland                     7.51          0.803
##  6 Netherlands                 7.41          0.746
##  7 Canada                      7.33          0.745
##  8 Sweden                      7.32          0.737
##  9 New Zealand                 7.31          0.774
## 10 Australia                   7.27          0.766
## # ℹ 122 more rows
fit_mean_social_support <- lm(mean_happiness_score ~ social_support, data = mean_social_support) #fitting simple linear regression
summary(fit_mean_social_support) #summary of fit
## 
## Call:
## lm(formula = mean_happiness_score ~ social_support, data = mean_social_support)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.65282 -0.47291 -0.00254  0.43672  1.61483 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      1.6163     0.2691   6.006 1.78e-08 ***
## social_support   6.3302     0.4309  14.690  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6781 on 130 degrees of freedom
## Multiple R-squared:  0.6241, Adjusted R-squared:  0.6212 
## F-statistic: 215.8 on 1 and 130 DF,  p-value: < 2.2e-16
mean_social_support_coef <- coef(summary(fit_mean_social_support))
mean_social_support_coef[1,1] #Intercept
## [1] 1.616323
mean_social_support_coef[2,1] #Slope
## [1] 6.330171
ggplot(mean_social_support, aes(x = social_support, y = mean_happiness_score)) +
  geom_text_repel(aes(x = social_support, y = mean_happiness_score, label = country), color = "#ee82ee", size = 2, max.overlaps = 17)+
  geom_point(aes(x = social_support, y = mean_happiness_score), color = "#ee82ee") + 
  geom_abline(intercept = mean_social_support_coef[1,1], slope = mean_social_support_coef[2,1], color = "white") + 
  theme(panel.background = element_rect(fill = "#112333"))+ labs(
        x = "Mean Social Support Coefficient",
        y = "Mean Happiness Score 2015-2020") + 
   ggtitle("Mean Happiness Score vs Mean Support") +   # Set the title using ggtitle()
  theme(plot.title = element_text(hjust = 0.5, family = "against", size = 12), axis.title = element_text(hjust = 0.5, family = "against", size = 8), axis.text = element_text(hjust = 0.5, family = "against", size = 4)) + 
  scale_x_continuous(breaks = seq(0, 1, by = 0.2),   # Customize x-axis breaks (intervals)
                     minor_breaks = seq(0, 1, by = 0.2),  # Customize x-axis minor breaks (intervals)
                     limits = c(0, 1),         # Customize x-axis limits
                     expand = c(0, 0)) +      # Remove extra space around the x-axis
  scale_y_continuous(breaks = seq(2, 8, by = 1),   # Customize y-axis breaks (intervals)
                     minor_breaks = seq(2, 8, by = 0.5),  # Customize y-axis minor breaks (intervals)
                     limits = c(2, 8),         # Customize y-axis limits
                     expand = c(0, 0)) +      # Remove extra space around the y-axis
  theme(panel.grid.major = element_line(color = "beige", size = 0.5),  # Customize major gridlines
        panel.grid.minor = element_line(color = "beige", size = 0.25))  # Customize minor gridlines
## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

 # geom_text_repel(aes(x = payroll, y = win, label=team), size=3)
mean_cpi_score <- hap_data %>%
  group_by(country) %>%
  reframe(mean_happiness_score = mean(happiness_score), cpi_score = mean(cpi_score)) %>%
  arrange(desc(mean_happiness_score))

mean_cpi_score
## # A tibble: 132 × 3
##    country     mean_happiness_score cpi_score
##    <chr>                      <dbl>     <dbl>
##  1 Finland                     7.58      86.7
##  2 Denmark                     7.56      88.7
##  3 Norway                      7.53      85  
##  4 Switzerland                 7.52      85.3
##  5 Iceland                     7.51      77.2
##  6 Netherlands                 7.41      82.5
##  7 Canada                      7.33      80.3
##  8 Sweden                      7.32      86  
##  9 New Zealand                 7.31      88.7
## 10 Australia                   7.27      77.7
## # ℹ 122 more rows
fit_mean_cpi_score <- lm(mean_happiness_score ~ cpi_score, data = mean_cpi_score) #fitting simple linear regression
summary(fit_mean_cpi_score) #summary of fit
## 
## Call:
## lm(formula = mean_happiness_score ~ cpi_score, data = mean_cpi_score)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.45955 -0.59362  0.04873  0.43105  1.80012 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 3.684555   0.168154   21.91   <2e-16 ***
## cpi_score   0.040354   0.003475   11.61   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7749 on 130 degrees of freedom
## Multiple R-squared:  0.5092, Adjusted R-squared:  0.5054 
## F-statistic: 134.9 on 1 and 130 DF,  p-value: < 2.2e-16
mean_cpi_score_coef <- coef(summary(fit_mean_cpi_score))
mean_cpi_score_coef[1,1] #Intercept
## [1] 3.684555
mean_cpi_score_coef[2,1] #Slope
## [1] 0.0403536
ggplot(mean_cpi_score, aes(x = cpi_score, y = mean_happiness_score)) +
  geom_text_repel(aes(x = cpi_score, y = mean_happiness_score, label = country), color = "#ee82ee", size = 2, max.overlaps = 17)+
  geom_point(aes(x = cpi_score, y = mean_happiness_score), color = "#ee82ee") + 
  geom_abline(intercept = mean_cpi_score_coef[1,1], slope = mean_cpi_score_coef[2,1], color = "white") + 
  theme(panel.background = element_rect(fill = "#112333"))+ labs(
        x = "Mean CPI Score Coefficient",
        y = "Mean Happiness Score 2015-2020") + 
   ggtitle("Mean Happiness Score vs Mean CPI Score") +   # Set the title using ggtitle()
  theme(plot.title = element_text(hjust = 0.5, family = "against", size = 12), axis.title = element_text(hjust = 0.5, family = "against", size = 8), axis.text = element_text(hjust = 0.5, family = "against", size = 4)) + 
  scale_x_continuous(breaks = seq(0, 100, by = 20),   # Customize x-axis breaks (intervals)
                     minor_breaks = seq(0, 100, by = 20),  # Customize x-axis minor breaks (intervals)
                     limits = c(0, 100),         # Customize x-axis limits
                     expand = c(0, 0)) +      # Remove extra space around the x-axis
  scale_y_continuous(breaks = seq(2, 8, by = 1),   # Customize y-axis breaks (intervals)
                     minor_breaks = seq(2, 8, by = 0.5),  # Customize y-axis minor breaks (intervals)
                     limits = c(2, 8),         # Customize y-axis limits
                     expand = c(0, 0)) +      # Remove extra space around the y-axis
  theme(panel.grid.major = element_line(color = "beige", size = 0.5),  # Customize major gridlines
        panel.grid.minor = element_line(color = "beige", size = 0.25))  # Customize minor gridlines
## Warning: ggrepel: 3 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

 # geom_text_repel(aes(x = payroll, y = win, label=team), size=3)
final.fit.1 <- lm(happiness_score ~ gdp_per_capita + health + freedom + generosity + gov_trust + social_support, data = hap_data)

summary(final.fit.1)
## 
## Call:
## lm(formula = happiness_score ~ gdp_per_capita + health + freedom + 
##     generosity + gov_trust + social_support, data = hap_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.88535 -0.36630  0.04372  0.35278  1.39314 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     2.33227    0.07712  30.241  < 2e-16 ***
## gdp_per_capita  1.42153    0.08664  16.408  < 2e-16 ***
## health          1.30082    0.15062   8.636  < 2e-16 ***
## freedom         1.58766    0.16875   9.408  < 2e-16 ***
## generosity      0.85785    0.18438   4.653 3.84e-06 ***
## gov_trust       0.73820    0.21887   3.373  0.00078 ***
## social_support  0.03823    0.03620   1.056  0.29127    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5598 on 785 degrees of freedom
## Multiple R-squared:  0.7541, Adjusted R-squared:  0.7522 
## F-statistic: 401.3 on 6 and 785 DF,  p-value: < 2.2e-16
plot(final.fit.1, 1:2)

moving_plot_gdp <- ggplot(data = hap_data, aes(x = gdp_per_capita + health + freedom + generosity + gov_trust + social_support, y = happiness_score, frame = year, color = hap_data$continent), color = hap_data$continent) + geom_point() + stat_smooth(method = "lm", se = TRUE, color = "white", aes(group = 0)) + labs(title = "Happiness Score VS WHR Indicators", x = "Happiness Indicators", y = "Happiness ", fill = "Continent") + theme(panel.background = element_rect(fill = "#112333")) + theme(plot.title = element_text(hjust = 0.5, family = "against", size = 12), axis.title = element_text(hjust = 0.5, family = "against", size = 8), axis.text = element_text(hjust = 0.5, family = "against", size = 4))

ggplotly(moving_plot_gdp)
## `geom_smooth()` using formula = 'y ~ x'
wgyp <- fread("data/wgypHappy.csv", header = T, stringsAsFactors = F)

names(wgyp)
##  [1] "timestamp"      "email"          "program"        "in_usa"        
##  [5] "gdp_per_capita" "health"         "freedom"        "generosity"    
##  [9] "gov_trust"      "social_support"
wgyp_clean <- wgyp %>% 
  select(-1, -2, -3, -4)

names(wgyp_clean)
## [1] "gdp_per_capita" "health"         "freedom"        "generosity"    
## [5] "gov_trust"      "social_support"
wgyp_prediction <- predict(final.fit.1, wgyp)
wgyp_prediction <- wgyp_prediction/10
wgyp_prediction
##         1         2         3         4         5         6         7         8 
## 4.2712389 5.4004642 4.3142264 5.0789877 0.8276564 3.4502691 3.6792132 4.8106825 
##         9        10        11        12        13        14        15        16 
## 4.3092866 3.6661205 5.1273201 4.3932857 4.6490500 4.7945651 4.7074604 3.3495892 
##        17        18        19        20        21        22        23        24 
## 5.3385238 2.9089960 4.6344644 3.6985214 4.7494914 4.6104437 5.7216430 4.2838838 
##        25        26        27        28        29        30        31        32 
## 3.1050383 4.1434699 5.3555405 3.9569297 4.4425588 4.4726997 3.8293803 2.9839383 
##        33        34        35        36        37        38        39        40 
## 5.2169286 4.6862640 3.7781641 4.7221960 4.5802769 3.2452608 4.0727016 5.3641337 
##        41        42        43        44        45        46        47        48 
## 4.3212359 2.9268739 4.3942319 3.2882601 4.5400056 5.3311314 4.9397015 2.9826032 
##        49        50        51        52        53        54        55        56 
## 5.2010340 4.0858011 2.9971318 4.7035151 3.1409546 4.8337188 4.2561842 4.0598598 
##        57        58        59        60        61        62        63        64 
## 2.7466891 4.6270038 4.8122404 4.0731540 3.4658190 3.9614710 3.7067258 4.0039275 
##        65        66        67        68        69        70        71        72 
## 3.0593500 4.6603407 3.9563126 3.5646947 4.8853171 3.9930690 3.7064659 3.6494922 
##        73        74        75        76        77        78        79        80 
## 3.7341241 3.4018144 3.8259488 4.2883937 3.8434070 3.1855746 2.6564948 6.1775197 
##        81        82        83        84        85        86        87        88 
## 3.8858008 2.3343596 4.7007702 3.2170191 3.5075306 2.4602394 4.0740464 4.2598788 
##        89        90        91        92        93        94        95        96 
## 4.1031132 3.5979791 4.7520493 4.7635529 3.1065962 4.0316651 4.5708387 4.7820793 
##        97        98        99       100       101       102       103       104 
## 3.6809625 3.8887941 3.1393162 4.9314691 5.5620374 2.4784469 4.3438466 2.9046932 
##       105       106       107       108       109       110       111       112 
## 3.7659079 4.5944271 4.7756174 4.6638819 3.5598625 3.8869328 3.1336316 3.7552932 
##       113       114       115       116       117       118       119       120 
## 3.6949890 5.8637961 3.8421158 3.8991318 5.4319557 2.8382497 3.9025296 4.5586608 
##       121       122       123       124       125       126       127 
## 3.3825480 4.3576829 3.9874907 4.3371407 3.6136455 4.5301997 4.5539540
mean(wgyp_prediction)
## [1] 4.08445
max(wgyp_prediction)
## [1] 6.17752
min(wgyp_prediction)
## [1] 0.8276564
wgyp_dsa <- wgyp %>%
  filter(program == "Data Science Academy")
wgyp_pred_dsa <- predict(final.fit.1, wgyp_dsa)
wgyp_pred_dsa <- wgyp_pred_dsa/10
mean(wgyp_pred_dsa)
## [1] 4.284861
max(wgyp_pred_dsa)
## [1] 5.721643
min(wgyp_pred_dsa)
## [1] 0.8276564
wgyp_lbw <- wgyp %>%
  filter(program == "Leadership in the Business World")
wgyp_pred_lbw <- predict(final.fit.1, wgyp_lbw)
wgyp_pred_lbw <- wgyp_pred_lbw/10
mean(wgyp_pred_lbw)
## [1] 3.961103
max(wgyp_pred_lbw)
## [1] 6.17752
min(wgyp_pred_lbw)
## [1] 2.33436
wgyp_usa <- wgyp %>%
  filter(in_usa == "Yes")
wgyp_pred_usa <- predict(final.fit.1, wgyp_usa)
wgyp_pred_usa <- wgyp_pred_usa/10
mean(wgyp_pred_usa)
## [1] 4.056982
max(wgyp_pred_usa)
## [1] 6.17752
min(wgyp_pred_usa)
## [1] 0.8276564
wgyp_nusa <- wgyp %>%
  filter(in_usa == "No")
wgyp_pred_nusa <- predict(final.fit.1, wgyp_nusa)
wgyp_pred_nusa <- wgyp_pred_nusa/10
mean(wgyp_pred_nusa)
## [1] 4.138108
max(wgyp_pred_nusa)
## [1] 5.863796
min(wgyp_pred_nusa)
## [1] 2.478447
#NA removal
country_data <- select(country_data,-V1)

sum(is.na(country_data))
## [1] 77
na_counts <- colSums(is.na(country_data))
na_cols <- select(country_data,names(na_counts[na_counts > 0]))
na.vals <- colSums(is.na(na_cols))

na.vals
##         agricultural_land                 land_area              armed_forces 
##                         2                         1                         3 
##                birth_rate          carbon_emissions                       cpi 
##                         2                         2                         3 
##                cpi_change            fertility_rate             forested_area 
##                         3                         2                         2 
##            gasoline_price                       gdp    primary_edu_enrollment 
##                         3                         1                         3 
##   tertiary_edu_enrollment          infant_mortality           life_expectancy 
##                         3                         2                         2 
##        maternal_mortality              minimum_wage pocket_health_expenditure 
##                         2                        20                         1 
##   physicians_per_thousand                population               labor_force 
##                         2                         1                         2 
##               tax_revenue                 total_tax         unemployment_rate 
##                         9                         2                         2 
##          urban_population 
##                         2
na_row_tf <-  apply(is.na(country_data), 1, any)

na_rows <- which(na_row_tf)

na_row_tf <- !na_row_tf
no_na_rows <- which(na_row_tf)

na_rows <- country_data[na_rows, ]
na_rows
##     happiness_score                country density agricultural_land land_area
##  1:           7.097                Austria     109              32.4     83871
##  2:           6.173                Bahrain    2239              11.1       765
##  3:           5.684                Bolivia      11              34.8   1098581
##  4:           5.633 Bosnia and Herzegovina      64              43.1     51197
##  5:           4.393               Cambodia      95              30.9    181035
##  6:           4.397                   Chad      13              39.7   1284000
##  7:           3.545                Comoros     467              71.5      2235
##  8:           6.130                 Cyprus     131              12.2      9251
##  9:           7.586                Denmark     137              62.0     43094
## 10:           5.559                Ecuador      71              22.2    283561
## 11:           4.170                  Egypt     103               3.8   1001450
## 12:           4.091               Ethiopia     115              36.3   1104300
## 13:           7.804                Finland      18               7.5    338145
## 14:           5.072                 Guinea      53              59.0    245857
## 15:           7.530                Iceland       3              18.7    103000
## 16:           6.405                  Italy     206              43.2    301340
## 17:           4.724             Mauritania       5              38.5   1030700
## 18:           5.722             Montenegro      47              19.0     13812
## 19:           4.631                Namibia       3              47.1    824292
## 20:           5.254        North Macedonia      83                NA     25713
## 21:           7.315                 Norway      15               2.7    323802
## 22:           6.265                 Panama      58              30.4     75420
## 23:           6.587              Singapore    8358               0.9       716
## 24:           5.275           South Africa      49              79.8   1219090
## 25:           4.908     State of Palestine     847                NA        NA
## 26:           7.395                 Sweden      25               7.4    450295
## 27:           7.240            Switzerland     219              38.4     41277
## 28:           6.014             Uzbekistan      79              62.9    447400
## 29:           5.211              Venezuela      32              24.5    912050
## 30:           3.204               Zimbabwe      38              41.9    390757
##     happiness_score                country density agricultural_land land_area
##     armed_forces birth_rate carbon_emissions     cpi cpi_change fertility_rate
##  1:        21000       9.70            61448  118.06        1.5           1.47
##  2:        19000      13.99            31694  117.59        2.1           1.99
##  3:        71000      21.75            21606  148.32        1.8           2.73
##  4:        11000       8.11            21848  104.90        0.6           1.27
##  5:       191000      22.46             9919  127.63        2.5           2.50
##  6:        35000      42.17             1016  117.70       -1.0           5.75
##  7:           NA      31.88              202  103.62       -4.3           4.21
##  8:        16000      10.46             6626  102.51        0.3           1.33
##  9:        15000      10.60            31786  110.35        0.8           1.73
## 10:        41000      19.72            41155  124.14        0.3           2.43
## 11:       836000      26.38           238560  288.57        9.2           3.33
## 12:       138000      32.34            14870  143.86       15.8           4.25
## 13:        25000       8.60            45871  112.33        1.0           1.41
## 14:        13000      36.36             2996  262.95        9.5           4.70
## 15:            0      12.00             2065  129.00        3.0           1.71
## 16:       347000       7.30           320411  110.62        0.6           1.29
## 17:        21000      33.69             2739  135.02        2.3           4.56
## 18:        12000      11.73             2017  116.32        2.6           1.75
## 19:        16000      28.64             4228  157.97        3.7           3.40
## 20:           NA         NA               NA      NA         NA             NA
## 21:        23000      10.40            41023  120.27        2.2           1.56
## 22:        26000      18.98            10715  122.07       -0.4           2.46
## 23:        81000       8.80            37535  114.41        0.6           1.14
## 24:        80000      20.51           476644  158.93        4.1           2.41
## 25:           NA         NA               NA      NA         NA             NA
## 26:        30000      11.40            43252  110.51        1.8           1.76
## 27:        21000      10.00            34477   99.55        0.4           1.52
## 28:        68000      23.30            91811      NA         NA           2.42
## 29:       343000      17.88           164175 2740.27      254.9           2.27
## 30:        51000      30.68            10983  105.51        0.9           3.62
##     armed_forces birth_rate carbon_emissions     cpi cpi_change fertility_rate
##     forested_area gasoline_price          gdp primary_edu_enrollment
##  1:          46.9           1.20 4.463147e+11                  103.1
##  2:           0.8           0.43 3.857407e+10                   99.4
##  3:          50.3           0.71 4.089532e+10                   98.2
##  4:          42.7           1.05 2.004785e+10                     NA
##  5:          52.9           0.90 2.708939e+10                  107.4
##  6:           3.8           0.78 1.131495e+10                   86.8
##  7:          19.7             NA 1.185729e+09                   99.5
##  8:          18.7           1.23 2.456465e+10                   99.3
##  9:          14.7           1.55 3.480780e+11                  101.3
## 10:          50.2           0.61 1.074357e+11                  103.3
## 11:           0.1           0.40 3.031751e+11                  106.3
## 12:          12.5           0.75 9.610766e+10                  101.0
## 13:          73.1           1.45 2.687612e+11                  100.2
## 14:          25.8           0.90 1.359028e+10                   91.5
## 15:           0.5           1.69 2.418804e+10                  100.4
## 16:          31.8           1.61 2.001244e+12                  101.9
## 17:           0.2           1.13 7.593752e+09                   99.9
## 18:          61.5           1.16 5.494737e+09                  100.0
## 19:           8.3           0.76 1.236653e+10                  124.2
## 20:            NA             NA 1.022078e+10                     NA
## 21:          33.2           1.78 4.033364e+11                  100.3
## 22:          61.9           0.74 6.680080e+10                   94.4
## 23:          23.1           1.25 3.720625e+11                  100.6
## 24:           7.6           0.92 3.514316e+11                  100.9
## 25:            NA             NA           NA                     NA
## 26:          68.9           1.42 5.308329e+11                  126.6
## 27:          31.8           1.45 7.030824e+11                  105.2
## 28:           7.5           1.03 5.792129e+10                  104.2
## 29:          52.7           0.00 4.823593e+11                   97.2
## 30:          35.5           1.34 2.144076e+10                  109.9
##     forested_area gasoline_price          gdp primary_edu_enrollment
##     tertiary_edu_enrollment infant_mortality life_expectancy maternal_mortality
##  1:                    85.1              2.9            81.6                  5
##  2:                    50.5              6.1            77.2                 14
##  3:                      NA             21.8            71.2                155
##  4:                    23.3              5.0            77.3                 10
##  5:                    13.7             24.0            69.6                160
##  6:                     3.3             71.4            54.0               1140
##  7:                     9.0             51.3            64.1                273
##  8:                    75.9              1.9            80.8                  6
##  9:                    80.6              3.6            81.0                  4
## 10:                    44.9             12.2            76.8                 59
## 11:                    35.2             18.1            71.8                 37
## 12:                     8.1             39.1            66.2                401
## 13:                    88.2              1.4            81.7                  3
## 14:                    11.6             64.9            61.2                576
## 15:                    71.8              1.5            82.7                  4
## 16:                    61.9              2.6            82.9                  2
## 17:                     5.0             51.5            64.7                766
## 18:                    56.1              2.3            76.8                  6
## 19:                    22.9             29.0            63.4                195
## 20:                      NA               NA              NA                 NA
## 21:                    82.0              2.1            82.8                  2
## 22:                    47.8             13.1            78.3                 52
## 23:                    84.8              2.3            83.1                  8
## 24:                    22.4             28.5            63.9                119
## 25:                      NA               NA              NA                 NA
## 26:                    67.0              2.2            82.5                  4
## 27:                    59.6              3.7            83.6                  5
## 28:                    10.1             19.1            71.6                 29
## 29:                    79.3             21.4            72.1                125
## 30:                    10.0             33.9            61.2                458
##     tertiary_edu_enrollment infant_mortality life_expectancy maternal_mortality
##     minimum_wage pocket_health_expenditure physicians_per_thousand population
##  1:           NA                      17.9                    5.17    8877067
##  2:           NA                      25.1                    0.93    1501635
##  3:         1.36                      25.9                    1.59   11513100
##  4:         1.04                      28.6                    2.16    3301000
##  5:           NA                      59.4                    0.17   16486542
##  6:         0.60                      56.4                    0.04   15946876
##  7:         0.71                      74.8                    0.27     850886
##  8:           NA                      43.9                    1.95    1198575
##  9:           NA                      13.7                    4.01    5818553
## 10:         2.46                      43.7                    2.04   17373662
## 11:           NA                      62.0                    0.45  100388073
## 12:           NA                      37.8                    0.08  112078730
## 13:           NA                      19.9                    3.81    5520314
## 14:           NA                      54.5                    0.08   12771246
## 15:           NA                      17.0                    4.08     361313
## 16:           NA                      22.8                    3.98   60297396
## 17:         0.53                      48.2                    0.19    4525696
## 18:         1.23                      31.8                    2.76     622137
## 19:           NA                       8.3                    0.42    2494530
## 20:           NA                      35.6                      NA    1836713
## 21:           NA                      14.3                    2.92    5347896
## 22:         1.53                      30.5                    1.57    4246439
## 23:           NA                      36.7                    2.29    5703569
## 24:           NA                       7.7                    0.91   58558270
## 25:           NA                        NA                      NA         NA
## 26:           NA                      15.2                    3.98   10285453
## 27:           NA                      28.3                    4.30    8574832
## 28:         0.24                      42.7                    2.37   33580650
## 29:         0.01                      45.8                    1.92   28515829
## 30:           NA                      25.8                    0.21   14645468
##     minimum_wage pocket_health_expenditure physicians_per_thousand population
##     labor_force tax_revenue total_tax unemployment_rate urban_population
##  1:        60.7        25.4      51.4              4.67          5194416
##  2:        73.4         4.2      13.8              0.71          1467109
##  3:        71.8        17.0      83.7              3.50          8033035
##  4:        46.4        20.4      23.7             18.42          1605144
##  5:        82.3        17.1      23.1              0.68          3924621
##  6:        70.7          NA      63.5              1.89          3712273
##  7:        43.3          NA     219.6              4.34           248152
##  8:        63.1        24.5      22.4              7.27           800708
##  9:        62.2        32.4      23.8              4.91          5119978
## 10:        68.0          NA      34.4              3.97         11116711
## 11:        46.4        12.5      44.4             10.76         42895824
## 12:        79.6         7.5      37.7              2.08         23788710
## 13:        59.1        20.8      36.6              6.59          4716888
## 14:        61.5        10.8      69.3              4.30          4661505
## 15:        75.0        23.3      31.9              2.84           339110
## 16:        49.6        24.3      59.1              9.89         42651966
## 17:        45.9          NA      67.0              9.55          2466821
## 18:        54.4          NA      22.2             14.88           417765
## 19:        59.5        27.1      20.7             20.27          1273258
## 20:          NA          NA        NA                NA               NA
## 21:        63.8        23.9      36.2              3.35          4418218
## 22:        66.6          NA      37.2              3.90          2890084
## 23:        70.5        13.1      21.0              4.11          5703569
## 24:        56.0        27.5      29.2             28.18         39149717
## 25:          NA          NA        NA                NA               NA
## 26:        64.6        27.9      49.1              6.48          9021165
## 27:        68.3        10.1      28.8              4.58          6332428
## 28:        65.1        14.8      31.6              5.92         16935729
## 29:        59.7          NA      73.3              8.80         25162368
## 30:        83.1        20.7      31.6              4.95          4717305
##     labor_force tax_revenue total_tax unemployment_rate urban_population
##       latitude  longitude
##  1:  47.516231  14.550072
##  2:  26.066700  50.557700
##  3: -16.290154 -63.588653
##  4:  43.915886  17.679076
##  5:  12.565679 104.990963
##  6:  15.454166  18.732207
##  7: -11.645500  43.333300
##  8:  35.126413  33.429859
##  9:  56.263920   9.501785
## 10:  -1.831239 -78.183406
## 11:  26.820553  30.802498
## 12:   9.145000  40.489673
## 13:  61.924110  25.748151
## 14:   9.945587  -9.696645
## 15:  64.963051 -19.020835
## 16:  41.871940  12.567380
## 17:  21.007890 -10.940835
## 18:  42.708678  19.374390
## 19: -22.957640  18.490410
## 20:  41.608635  21.745275
## 21:  60.472024   8.468946
## 22:   8.537981 -80.782127
## 23:   1.352083 103.819836
## 24: -30.559482  22.937506
## 25:  31.952162  35.233154
## 26:  60.128161  18.643501
## 27:  46.818188   8.227512
## 28:  41.377491  64.585262
## 29:   6.423750 -66.589730
## 30: -19.015438  29.154857
##       latitude  longitude
no_na_country_data <- country_data[no_na_rows, ]

sum(is.na(no_na_country_data))
## [1] 0
# This uses every variable
library(randomForest)
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
## 
##     combine
## The following object is masked from 'package:ggplot2':
## 
##     margin
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
## 
##     lift
set.seed(7)

# Split the data into predictors (X) and target variable (Y)
X <- no_na_country_data %>% select(-happiness_score)
Y <- no_na_country_data$happiness_score

# Split the data into 70/30 testing/training sets
train_index <- createDataPartition(Y, p = 0.7, list = FALSE)
train_data <- X[train_index, ]
test_data <- X[-train_index, ]
train_target <- Y[train_index]
test_target <- Y[-train_index]

# Create a random forest model
rf_model <- randomForest(x = train_data, y = train_target, ntree = 100, importance = TRUE, proximity = TRUE)


# Cross Validation
ctrl <- trainControl(method = "cv", number = 5)  # 5-fold 
# Cross-validate the random forest model
cv_results <- train(x = X, y = Y, method = "rf", trControl = ctrl, tuneGrid = expand.grid(mtry = 2), preProc = c("center", "scale"))


# Print the summary of the random forest model
rf_model
## 
## Call:
##  randomForest(x = train_data, y = train_target, ntree = 100, importance = TRUE,      proximity = TRUE) 
##                Type of random forest: regression
##                      Number of trees: 100
## No. of variables tried at each split: 9
## 
##           Mean of squared residuals: 0.5575185
##                     % Var explained: 51.16
cv_results
## Random Forest 
## 
## 103 samples
##  29 predictor
## 
## Pre-processing: centered (28), scaled (28), ignore (1) 
## Resampling: Cross-Validated (5 fold) 
## Summary of sample sizes: 82, 83, 83, 82, 82 
## Resampling results:
## 
##   RMSE       Rsquared   MAE      
##   0.7093225  0.6263857  0.5017048
## 
## Tuning parameter 'mtry' was held constant at a value of 2
plot(rf_model)

# Print a plot of predictions vs. actuals

predictions <- predict(rf_model, newdata = test_data)
plot(test_target, predictions)

varImpPlot(rf_model)

# This uses only the variables significant to 95%
library(randomForest)
library(caret)

set.seed(7)

significant_columns <- c("armed_forces", "forested_area", "tertiary_edu_enrollment", "life_expectancy", "maternal_mortality", "minimum_wage", "unemployment_rate", "urban_population", "longitude" )

# Split the data into predictors (X) and target variable (Y)
X <- select(no_na_country_data, all_of(significant_columns))
Y <- no_na_country_data$happiness_score

# Split the data into 70/30 testing/training sets
train_index <- createDataPartition(Y, p = 0.7, list = FALSE)
train_data <- X[train_index, ]
test_data <- X[-train_index, ]
train_target <- Y[train_index]
test_target <- Y[-train_index]


# Create a random forest model
rf_model <- randomForest(x = train_data, y = train_target, ntree = 100, importance = TRUE, proximity = TRUE)


# Cross Validation
ctrl <- trainControl(method = "cv", number = 5)  # 5-fold 
# Cross-validate the random forest model
cv_results <- train(x = X, y = Y, method = "rf", trControl = ctrl, tuneGrid = expand.grid(mtry = 2), preProc = c("center", "scale"))

# Print the summary of the random forest model
rf_model
## 
## Call:
##  randomForest(x = train_data, y = train_target, ntree = 100, importance = TRUE,      proximity = TRUE) 
##                Type of random forest: regression
##                      Number of trees: 100
## No. of variables tried at each split: 3
## 
##           Mean of squared residuals: 0.6707164
##                     % Var explained: 41.24
cv_results
## Random Forest 
## 
## 103 samples
##   9 predictor
## 
## Pre-processing: centered (9), scaled (9) 
## Resampling: Cross-Validated (5 fold) 
## Summary of sample sizes: 82, 83, 83, 83, 81 
## Resampling results:
## 
##   RMSE       Rsquared   MAE      
##   0.7488524  0.5818441  0.5404693
## 
## Tuning parameter 'mtry' was held constant at a value of 2
plot(rf_model)

# Print a plot of predictions vs. actuals

predictions <- predict(rf_model, newdata = test_data)
plot(test_target, predictions)

varImpPlot(rf_model)

library(tree)
library(randomForest)
library(caret)

set.seed(7)


# Split the data into 70/30 testing/training sets
train_index <- createDataPartition(Y, p = 0.7, list = FALSE)
train_data <- X[train_index, ]
test_data <- X[-train_index, ]
happiness_score <- Y[train_index]
test_target <- Y[-train_index]
train <- cbind(happiness_score,train_data)

# Create a decision tree model
fit1 <- tree(happiness_score ~ ., data = train)

# Print the tree model
print(fit1)
## node), split, n, deviance, yval
##       * denotes terminal node
## 
##  1) root 75 85.6200 5.527  
##    2) minimum_wage < 2.2 53 43.7900 5.070  
##      4) tertiary_edu_enrollment < 28.3 30 27.1600 4.653  
##        8) forested_area < 24.6 10  4.5330 4.084  
##         16) forested_area < 8.75 5  0.4065 4.453 *
##         17) forested_area > 8.75 5  2.7670 3.715 *
##        9) forested_area > 24.6 20 17.7700 4.938  
##         18) maternal_mortality < 498.5 15 10.3600 5.242  
##           36) minimum_wage < 0.465 9  4.5420 4.831 *
##           37) minimum_wage > 0.465 6  2.0170 5.858 *
##         19) maternal_mortality > 498.5 5  1.8600 4.025 *
##      5) tertiary_edu_enrollment > 28.3 23  4.6440 5.613  
##       10) unemployment_rate < 8.805 13  0.9247 5.823 *
##       11) unemployment_rate > 8.805 10  2.4030 5.340  
##         22) forested_area < 22.4 5  0.4960 5.023 *
##         23) forested_area > 22.4 5  0.9016 5.657 *
##    3) minimum_wage > 2.2 22  4.0830 6.628  
##      6) minimum_wage < 7.175 14  1.0690 6.377 *
##      7) minimum_wage > 7.175 8  0.5869 7.067 *
# Plot the tree
plot(fit1)
text(fit1,srt=45,cex=0.5)

# Make predictions on the testing data
test_predictions <- predict(fit1, newdata = test_data)

# Calculate RMSE and R-squared for testing
test_rmse <- sqrt(mean((test_target - test_predictions)^2))
test_r_squared <- 1 - sum((test_target - test_predictions)^2) / sum((test_target - mean(test_target))^2)

cat("Testing RMSE:", test_rmse, "\n")
## Testing RMSE: 0.7848045
cat("Testing R-squared:", test_r_squared, "\n")
## Testing R-squared: 0.5428855